#Mayya Komisarchik
#Gov 2001 Chen 2012 Final Paper Code
############################################################################################
#packages and setup
  require(foreign)
  require(sampleSelection)
  require(devtools)
  require(MatchingFrontier)
  require(cem)
  library(MatchIt)
  require(Zelig)
  require(xtable)
  library(mvtnorm)
  require(dummies)
#clear workspace
  rm(list=ls())

############################################################################################
#Setup  
############################################################################################
#Load Data  
  #Change these directories to load your data
    indiv<-read.dta("/Users/MayyaKomisarchik/Documents/Courses/Spring 2014/Gov 2001/Replication Paper/Data/Chen.Individual.Data.dta")
    precinct<-read.table("http://www-personal.umich.edu/~jowei/fema/Chen.Precinct.Data.txt", header=T)

#Data Diagnostics
    demapplicants<-nrow(indiv[indiv$applied ==1 & indiv$democrat==1 & indiv$date<=41104,])
    repapplicants<-nrow(indiv[indiv$applied ==1 & indiv$republican==1 & indiv$date<=41104,])

  #Missing values in treatment variable indicator
    nrow(indiv)-sum(is.na(indiv$Aid_awarded)) #3,302,532, which means 309,408 have data for whether or not they were treated
    sum(indiv$applied[indiv$applied==1])

  #Generate a subset of individuals who applied for aid before November 2004
    #This should be the sum of Democratic and Republican applicants. This matches Chen's appendix table 
    #http://www-personal.umich.edu/~jowei/fema/OnlineAppendix.pdf
    #We aren't worried about third parties in this case
      pelect<-indiv[indiv$applied ==1 & indiv$date <=41104,]

    #Spit out a .csv of this file for the RCE match
      setwd("/Users/MayyaKomisarchik/Documents/Courses/Spring 2014/Gov 2001/Replication Paper/Data/")
      write.csv(pelect, file = "applicants.csv")
    #Code to Load This up on the RCE (comment out for general run)
      #pelect<-read.csv("/nfs/home/M/mkomisarchik/Replication Paper/Data/applicants.csv",row.names=1)

############################################################################################
#Propensity Scores
############################################################################################
#I am not using these to match the data - in fact, we're going to use Mahalanobis distance and get an L1 match per Chris' paper
#This is just for the purpose of visualizing the probability to get assigned to treatment by group. These look to be relatively
#well balanced so far.
  pscorereg<-glm(Aid_awarded ~ age + mph + ageSQUARED + mphSQUARED + republican +male + black + CountyCode+
                      + medincome100k + medhomevalue100k,
                      family = binomial("logit"),
                      data = pelect)

#Treatment and Control Fitted Values
  propscores<-pscorereg$fitted
  propscoret<-propscores[pelect$Aid_awarded==1]
  propscorec<-propscores[pelect$Aid_awarded==0]

############################################################################################
#Propensity Score Graphics
############################################################################################
  setwd("/Users/MayyaKomisarchik/Documents/Courses/Spring 2014/Gov 2001/Replication Paper/Code/Final Paper Code/Code")
  pdf("T2002Pscore.pdf")
  par(mfrow=c(1,1))
  hist(propscores[pelect$Turnout2002==1],main="Propensity Scores for Voters and \n Non-Voters in the \n 2002 November General Election",xlab="Propensity Sore",col=rgb(1, 0, 0, 1))
  hist(propscores[pelect$Turnout2002==0],main="Histogram of Propensity Scores",xlab="Propensity Sore",col=rgb(0, 0,1, 0.7),add=T)
  legend('topleft',c('Voted in November 2002 General Election','Did Not Vote in November 2002 General Election'),
       fill = rgb(1:0,0,0:1,1:0.7), bty = 'n',
       border = NA,cex=.65)
  dev.off()
############################################################################################
#Balance, Matching, CEM FSATT
############################################################################################
#CEM Match
    #This is diagnostic code designed to tell me why the CEM match was breaking with a long list of concatenated drops
    #for(i in 1:100){
      #drops <- colnames(pelect)[!(1:length(colnames(pelect)) %in% sample(1:length(colnames(pelect)), 5))]
      #cem.match <- cem(treatment = "Aid_awarded", data = pelect, 
                       #drop = drops)
      #print(i)
      #print(colnames(pelect)[!(colnames(pelect) %in% drops)])
      #matched <- sum(cem.match$matched)
      #print(paste('total matches:', matched))
      #if(matched > 0){break}
    #}

  keeps <- c('mph', 'male', 'black', 'Turnout2002','republican','medincome10k','medhomevalue10k','age') 
  drops <- colnames(pelect)[!(colnames(pelect) %in% keeps)]
  
  
  #Generate cem.match object. This takes forever, which is why I'm saving it below
    cem.match <- cem(treatment = "Aid_awarded", data = pelect, eval.imbalance=T,
                     drop = drops)
    setwd("/Users/MayyaKomisarchik/Documents/Courses/Spring 2014/Gov 2001/Replication Paper/Data")
    #save.image(cem.RData)
      #load("/Users/MayyaKomisarchik/Documents/Courses/Spring 2014/Gov 2001/Replication Paper/Data/cem.RData")

    cem.match.att.mod6 <- att(obj=cem.match, formula=Turnout2004~Turnout2002+Aid_awarded+I(Aid_awarded*(Party=="REP"))+I(Party=="REP")+mph+mphSQUARED+age+ageSQUARED+medhomevalue10k+(Gender=="M")+black+medincome10k+as.factor(CountyCode),
                     data = pelect, model="logit")
    xtable(summary(cem.match.att.mod6))

  #Balance checks and pre- and post- CEM imbalance
    #Differences in means for important covariates, before CEM
    #Republicans
      test.rep <- t.test(pelect$republican[pelect$Aid_awarded == 1], pelect$republican[pelect$Aid_awarded == 0])
      test.rep$estimate
      test.rep$p.value

    #Voted in 2002
      test.2002 <- t.test(pelect$Turnout2002[pelect$Aid_awarded == 1], pelect$Turnout2002[pelect$Aid_awarded == 0])
      test.2002$estimate
      test.2002$p.value

    #mph
      test.mph <- t.test(pelect$mph[pelect$Aid_awarded == 1], pelect$mph[pelect$Aid_awarded == 0])
      test.mph$estimate
      test.mph$p.value

    #mph squared
      test.mphsqrd <- t.test(pelect$mphSQUARED[pelect$Aid_awarded == 1], pelect$mphSQUARED[pelect$Aid_awarded == 0])
      test.mphsqrd$estimate
      test.mphsqrd$p.value

    #age
      test.age <- t.test(pelect$age[pelect$Aid_awarded == 1], pelect$age[pelect$Aid_awarded == 0])
      test.age$estimate
      test.age$p.value

    #age squared
      test.agesqrd <- t.test(pelect$ageSQUARED[pelect$Aid_awarded == 1], pelect$ageSQUARED[pelect$Aid_awarded == 0])
      test.agesqrd$estimate
      test.agesqrd$p.value

    #income
      test.income <- t.test(pelect$medincome10k[pelect$Aid_awarded == 1], pelect$medincome10k[pelect$Aid_awarded == 0])
      test.income$estimate
      test.income$p.value

    #home value
      test.homeval <- t.test(pelect$medhomevalue10k[pelect$Aid_awarded == 1], pelect$medhomevalue10k[pelect$Aid_awarded == 0])
      test.homeval$estimate
      test.homeval$p.value

    #male
      test.male <- t.test(pelect$male[pelect$Aid_awarded == 1], pelect$male[pelect$Aid_awarded == 0])
      test.male$estimate
      test.male$p.value

    #black
      test.black <- t.test(pelect$black[pelect$Aid_awarded == 1], pelect$black[pelect$Aid_awarded == 0])
      test.black$estimate
      test.black$p.value

############################################################################################
#Balance Plot for T Stats
############################################################################################
  #Varnames for dotcharts
    names<-c()
  #Generate a vector of t stats for above diff in means in original
    tstats<-c(test.rep$statistic,test.2002$statistic,test.mph$statistic,test.mphsqrd$statistic,
              test.age$statistic, test.agesqrd$statistic,test.income$statistic,test.homeval$statistic,test.male$statistic, test.black$statistic)
    setwd("/Users/MayyaKomisarchik/Documents/Courses/Spring 2014/Gov 2001/Replication Paper/Code/Final Paper Code/Code")
    pdf("DotBefore.pdf")
    par(mfrow=c(1,1))
    dotchart(tstats, main="Studentized Differences in Means for \n Florida Voters Awarded FEMA Aid \n and Voters Not Awarded FEMA Aid",
         labels=c("Registered Republican","Voted in November \n 2002 General Election",
                  "Maximum Wind Speed\n (Miles per Hour)","Maximum Wind Speed \n (Miles per Hour Squared)",
                  "Voter's Age (Years)","Voter's Age \n (Years Squared)","Med. Household Income \n in Block Group ($10,000s)",
                  "Med. Home Value in \nBlock Group ($10,000s)","Voter's Gender (Male)","African American"),
                  cex = .9,xlab="Diff. in Means",pch=19)
          abline(v=0, col="red", lty=2)
    dev.off()
  #Multiply by CEM weights
      cemrep<-mean(pelect$republican[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0])
      cem2002<-mean(pelect$Turnout2002[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0])
      cemmph<-mean(pelect$mph[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0])
      cemmphsqrd<-mean(pelect$mphSQUARED[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0])
      cemage<-mean(pelect$age[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0])
      cemagesqrd<-mean(pelect$ageSQUARED[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0])
      cemincome<-mean(pelect$medincome10k[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0])
      cemhomeval<-mean(pelect$medhomevalue10k[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0])
      cemmale<-mean(pelect$male[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0])      
      cemblack<-mean(pelect$black[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0])

    #Differences in means for important covariates, post CEM
    #Republicans
      test.repcem <- t.test(pelect$republican[pelect$Aid_awarded == 1 &cem.match$matched==TRUE], (pelect$republican[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0]))
      test.repcem$estimate
      test.repcem$p.value
    
    #Voted in 2002
      test.2002cem <- t.test(pelect$Turnout2002[pelect$Aid_awarded == 1 &cem.match$matched==TRUE], (pelect$Turnout2002[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0]))
      test.2002cem$estimate
      test.2002cem$p.value
    
    #mph
      test.mphcem <- t.test(pelect$mph[pelect$Aid_awarded == 1 &cem.match$matched==TRUE], (pelect$mph[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0]))
      test.mphcem$estimate
      test.mphcem$p.value
    
    #mph squared
      test.mphsqrdcem <- t.test(pelect$mphSQUARED[pelect$Aid_awarded == 1 &cem.match$matched==TRUE], (pelect$mphSQUARED[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0]))
      test.mphsqrdcem$estimate
      test.mphsqrdcem$p.value
    
    #age
      test.agecem <- t.test(pelect$age[pelect$Aid_awarded == 1 &cem.match$matched==TRUE], (pelect$age[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0]))
      test.agecem$estimate
      test.agecem$p.value
    
    #age squared
      test.agesqrdcem <- t.test(pelect$ageSQUARED[pelect$Aid_awarded == 1 &cem.match$matched==TRUE], (pelect$ageSQUARED[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0]))
      test.agesqrdcem$estimate
      test.agesqrdcem$p.value
    
    #income
      test.incomecem <- t.test(pelect$medincome10k[pelect$Aid_awarded == 1 &cem.match$matched==TRUE], (pelect$medincome10k[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0]))
      test.incomecem$estimate
      test.incomecem$p.value
    
    #home value
      test.homevalcem <- t.test(pelect$medhomevalue10k[pelect$Aid_awarded == 1 &cem.match$matched==TRUE], (pelect$medhomevalue10k[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0]))
      test.homevalcem$estimate
      test.homevalcem$p.value
    
    #male
      test.malecem <- t.test(pelect$male[pelect$Aid_awarded == 1 &cem.match$matched==TRUE], (pelect$male[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0]))
      test.malecem$estimate
      test.malecem$p.value
    
    #black
      test.blackcem <- t.test(pelect$black[pelect$Aid_awarded == 1 &cem.match$matched==TRUE], (pelect$black[pelect$Aid_awarded == 0&cem.match$matched ==TRUE]*cem.match$w[cem.match$matched==TRUE & pelect$Aid_awarded==0]))
      test.blackcem$estimate
      test.blackcem$p.value

#Generate a vector of t stats for above diff in means in original
  tstatscem<-c(test.repcem$statistic,test.2002cem$statistic,test.mphcem$statistic,test.mphsqrdcem$statistic,
          test.agecem$statistic, test.agesqrdcem$statistic,test.incomecem$statistic,test.homevalcem$statistic,test.malecem$statistic, test.blackcem$statistic)
  setwd("/Users/MayyaKomisarchik/Documents/Courses/Spring 2014/Gov 2001/Replication Paper/Code/Final Paper Code/Code")
  pdf("DotAfter.pdf")
  par(mfrow=c(1,1))
  dotchart(tstatscem, main="Studentized Differences in CEM-Weighted Means for \n Florida Voters Awarded FEMA Aid \n and Voters Not Awarded FEMA Aid",
           labels=c("Registered Republican","Voted in November \n 2002 General Election",
                    "Maximum Wind Speed\n (Miles per Hour)","Maximum Wind Speed \n (Miles per Hour Squared)",
                    "Voter's Age (Years)","Voter's Age \n (Years Squared)","Med. Household Income \n in Block Group ($10,000s)",
                    "Med. Home Value in \nBlock Group ($10,000s)","Voter's Gender (Male)","African American"),
           cex = .9,xlab="Diff. in Means",pch=19,xlim=c(-2,2))
        abline(v=0, col="red", lty=2)
  dev.off()

############################################################################################
#Simulations from Matched Data, Research Extension (not in paper)
############################################################################################  
#Subset data to vars from the CEM ATT estimate
  xcem<-subset(pelect, select=c("Turnout2002", "Turnout2004","Aid_awarded",
                                "republican","mph","mphSQUARED","age",
                                "ageSQUARED","medhomevalue10k","medincome10k","male","black","CountyCode","Party","Gender"))


#Rename cem variables; don't want double dollar signs in the subsequent calls
  cemweights<-cem.match$w
  matches<-cem.match$matched
#Include dummies for county variables along with CEM weights and matches for manual weighting
  xcem<-cbind(xcem, cemweights,matches)

#Subset to matched observations exclusively. We're going to drop everything without a match per CEM
  xcem <- subset(xcem, xcem$matches==TRUE)

#Reorder columns to have the variables in which we'll be changing things in the front
  xcem <- xcem[c(2,3,4,11,12,1,5,6,7,8,9,10,16,17,13,14,15)]

#Split xcem into treatment and control variables
  control<-subset(xcem, xcem$Aid_awarded==0)
  active<-subset(xcem, xcem$Aid_awarded==1)

#Apply cem weights to control
  #Drop outcome and treatment
    controlcovars<-control[,3:17]
  #Multiply all covariates by cem weights
    controlweights<-controlcovars$cemweights
      mat <- matrix(, nrow = nrow(controlcovars), ncol = ncol(controlcovars)-3)
      mat<-as.data.frame(mat)
      for(column in 1:ncol(mat)){
      mat[, column] <- controlcovars[,column]*controlweights
      }
      colnames(mat)<-colnames(controlcovars[1:12])
  #Add outcome and treatment back in
    Aidaward<-control$Aid_awarded
    Turnout2004<-control$Turnout2004
    counties<-control$CountyCode
    Party<-control$Party
    Gender<-control$Gender
    control<-cbind(Turnout2004,Aidaward,mat,counties,Party,Gender)
    colnames(control) <- colnames(xcem)
  #Bind in treatment data
    cemdata<-rbind(control,active)

#Run logit on manually weighted data
  cemreg<-glm(Turnout2004~Turnout2002+Aid_awarded+I(Aid_awarded*(Party=="REP"))+I(Party=="REP")+mph+mphSQUARED+age+ageSQUARED+medhomevalue10k+(Gender=="M")+black+medincome10k+as.factor(CountyCode), 
              family=binomial(link='logit'), 
              data=cemdata)

#Simulate predicted values using inverse logit
  ctydummies<-dummy(cemdata$CountyCode)
  cemdatadum<-cbind(cemdata,ctydummies)
#Simulated betas
  set.seed(1234)
  sim.betas <- rmvnorm(n = 10000, mean = cemreg$coefficients,
                     sigma = vcov(cemreg))

#Create vectors of set covariates
  #Female democrat untreated
    femdemu<-apply(cemdata[2:12],2,mean)
    femdemu["male"]<-0
    femdemu["republican"]<-0
    femdemu["Aid_awarded"]<-0
    femdemu<-c(1,femdemu,0)
  #Female democrat untreated
    femdemt<-apply(cemdata[2:12],2,mean)
    femdemt["male"]<-0
    femdemt["republican"]<-0
    femdemt["Aid_awarded"]<-1
    femdemt<-c(1,femdemt,0)
  #Female republican untreated
    femrepu<-apply(cemdata[2:12],2,mean)
    femrepu["male"]<-0
    femrepu["republican"]<-1
    femrepu["Aid_awarded"]<-0
    femrepu<-c(1,femrepu,0)
#Female republican untreated
    femrept<-apply(cemdata[2:12],2,mean)
    femrept["male"]<-0
    femrept["republican"]<-1
    femrept["Aid_awarded"]<-1
    femrept<-c(1,femrept,1)
  #Male democrat untreated
    maledemu<-apply(cemdata[2:12],2,mean)
    maledemu["male"]<-1
    maledemu["republican"]<-0
    maledemu["Aid_awarded"]<-0
    maledemu<-c(1,maledemu,0)
  #Male democrat untreated
    maledemt<-apply(cemdata[2:12],2,mean)
    maledemt["male"]<-1
    maledemt["republican"]<-0
    maledemt["Aid_awarded"]<-1
    maledemt<-c(1,maledemt,0)
  #Male republican untreated
    malerepu<-apply(cemdata[2:12],2,mean)
    malerepu["male"]<-1
    malerepu["republican"]<-1
    malerepu["Aid_awarded"]<-0
    malerepu<-c(1,malerepu,0)
#Male republican treated
    malerept<-apply(cemdata[2:12],2,mean)
    malerept["male"]<-1
    malerept["republican"]<-1
    malerept["Aid_awarded"]<-1
    malerept<-c(1,malerept,1)
  #Black democrat untreated
    blackdemu<-apply(cemdata[2:12],2,mean)
    blackdemu["black"]<-1
    blackdemu["republican"]<-0
    blackdemu["Aid_awarded"]<-0
    blackdemu<-c(1,blackdemu,0)
    #Black democrat treated
    blackdemt<-apply(cemdata[2:12],2,mean)
    blackdemt["black"]<-1
    blackdemt["republican"]<-0
    blackdemt["Aid_awarded"]<-1
    blackdemt<-c(1,blackdemt,0)
  #black republican untreated
    blackrepu<-apply(cemdata[2:12],2,mean)
    blackrepu["black"]<-1
    blackrepu["republican"]<-1
    blackrepu["Aid_awarded"]<-0
    blackrepu<-c(1,blackrepu,0)
  #black republican untreated
    blackrept<-apply(cemdata[2:12],2,mean)
    blackrept["black"]<-1
    blackrept["republican"]<-1
    blackrept["Aid_awarded"]<-1
    blackrept<-c(1,blackrept,1)

  #Change column order of sim.betas, drop dummies
    sim.betas<-as.data.frame(sim.betas[,1:13])
    sim.betas<-sim.betas[c(1,3,5,11,12,2,6,7,8,9,10,13,4)]
    
    fd.femdem <- c(inv.logit(femdemt %*% t(sim.betas)))-c(inv.logit(femdemu %*% t(sim.betas)))
    mean(fd.femdem)

    fd.femrep <- c(inv.logit(femrept %*% t(sim.betas)))-c(inv.logit(femrepu %*% t(sim.betas)))
    mean(fd.femrep)

    fd.maledem <- c(inv.logit(maledemt %*% t(sim.betas)))-c(inv.logit(maledemu %*% t(sim.betas)))
    mean(fd.maledem)
    
    fd.malerep <- c(inv.logit(malerept %*% t(sim.betas)))-c(inv.logit(malerepu %*% t(sim.betas)))
    mean(fd.malerep)

    fd.blackrep <- c(inv.logit(blackrept %*% t(sim.betas)))-c(inv.logit(blackrepu %*% t(sim.betas)))
    mean(fd.blackrep)

    fd.blackdem <- c(inv.logit(blackdemt %*% t(sim.betas)))-c(inv.logit(blackdemu %*% t(sim.betas)))
    mean(fd.blackdem)
